In [1]:
from __future__ import division

import glob
import os

import librosa
import medleydb as mdb

import numpy as np
import scipy

import matplotlib.pyplot as plt
import IPython.display

import seaborn as sns
%matplotlib inline


import jams
import itertools

from onsetGroundTruth2 import *


:0: FutureWarning: IPython widgets are experimental and may change in the future.

In [24]:
def computeEnvelope(s):

    analytic_signal = scipy.signal.hilbert(s)
    amplitude_envelope = np.abs(analytic_signal)

    return amplitude_envelope

def computeSNR(gain, stemsAudio, mixAudio):        
    
    gainWeightedStem = np.array(gain)*stemsAudio
    error = gainWeightedStem.sum(1) - mixAudio
    snr = power_db(mixAudio) - power_db(error)
    
    return snr

In [25]:
# get list of files on medleyDB path and load them
trackList = os.listdir(mdb.AUDIO_PATH)

In [26]:
sr = 44100
gainWindow = int(sr*0.25)
temporalThreshold = sr*0.05
loudnessThreshold = -20

g ='Gain Window: ' + str(gainWindow/sr) + 'ms'
t = 'Temporal Threshold: ' + str(temporalThreshold/sr) + 'ms'
l = 'Loudness Threshold: ' + str(loudnessThreshold) + 'dB'

annotationRules = "\n".join([g,t,l])

# multitrack generator
mtrack_generator = mdb.load_multitracks(trackList)

SNR = []

for track in mtrack_generator:
    
    # only compute annotations for tracks without bleed
    if track.has_bleed == False:
        
        ### Load Thngs ###
        
        # data paths
        mixedAudioPath = track.mix_path
        stemsPathList = track.stem_filepaths()
        
        # audio
        mixAudio, stemsAudio = loadAudio(mixedAudioPath,stemsPathList, sr = sr)
        
        # compute envelope
        mixAudioEnvelope = np.array(computeEnvelope(mixAudio))
        stemsAudioEnvelope = np.array([computeEnvelope(s) for s in stemsAudio.T]).T 
        
        
        ### Gain Estimation ###
        
        # estimate gain per stem
        gain_signal = estimateGain(mixAudio, stemsAudio, gainWindow, int(gainWindow/2))
        gain_abs = estimateGain(np.abs(mixAudio), np.abs(stemsAudio), gainWindow, int(gainWindow/2))
        gain_env = estimateGain(mixAudioEnvelope, stemsAudioEnvelope, gainWindow, int(gainWindow/2))
        
        # weight stem audio by gain
        
        snr_signal = computeSNR(gain_signal, stemsAudio, mixAudio)
        snr_abs = computeSNR(gain_abs, stemsAudio, mixAudio)
        snr_env = computeSNR(gain_env, stemsAudio, mixAudio)
        
        SNR.append([snr_signal, snr_abs, snr_env]) 
        
np.array(SNR)


  File "<ipython-input-26-3b7eeab87110>", line 51
    np.array(SNR).
                  ^
SyntaxError: invalid syntax

In [27]:
SNR


Out[27]:
array([[  6.58314241e+00,   5.61531720e+00],
       [  2.34337320e+01,   1.07563639e+01],
       [  2.09322546e+01,   6.72052058e+00],
       [  4.30378640e+01,   4.04275522e+00],
       [  6.67041579e+00,   5.02534412e+00],
       [  2.25888056e+01,   7.42853871e+00],
       [  7.99713661e+00,   4.94846127e+00],
       [  7.76113173e+01,   6.53072026e+00],
       [  1.84994721e+01,   7.08131500e+00],
       [  3.02409745e+01,   5.35663470e+00],
       [  3.88780437e+00,   2.62803767e+00],
       [  4.92826852e+01,   8.58280320e+00],
       [  8.55480361e+00,   4.89428649e+00],
       [  1.88971215e+01,   6.69961105e+00],
       [  7.04567524e+01,   3.20499211e+00],
       [  7.49359014e+01,   7.52737304e+00],
       [  4.83147314e+01,   6.23738383e+00],
       [  7.04449971e+00,   5.01031340e+00],
       [  1.31356773e+01,   7.36903173e+00],
       [  4.08118030e+01,   3.78376786e+00],
       [  5.37850602e+00,   3.85892470e+00],
       [  9.59567260e+00,   7.35540969e+00],
       [  1.62656793e-01,  -1.73981889e+00],
       [  2.33078016e+01,   6.64550868e+00],
       [  2.57585177e+01,   6.94345487e+00],
       [  4.43926341e+01,   1.16242791e+01],
       [  7.24421097e+00,   5.18578358e+00],
       [  7.80147987e+01,   7.18923815e+00],
       [  2.89884354e+02,   2.89884354e+02],
       [  8.26375481e+00,   4.55733764e+00],
       [  3.41324429e+00,   2.15598706e+00],
       [  1.70800270e+01,   1.52356427e+01],
       [  4.52136589e+00,   3.11493443e+00],
       [  7.56351714e+01,   7.94309909e+00],
       [  8.14277166e+00,   5.27186950e+00],
       [  3.41828094e+01,   9.56848839e+00],
       [  1.21499866e+00,   7.91724680e-02],
       [  1.34477793e+01,   7.43164501e+00],
       [  1.27536130e+00,   4.28105788e-01],
       [  7.94363244e+01,   6.82086752e+00],
       [  5.09497891e+01,   6.10254987e+00],
       [  2.85483491e+01,   5.24945993e+00],
       [  1.67323443e+01,   4.93457920e+00],
       [  1.31058903e+01,   6.00079406e+00],
       [  4.74303409e+00,   3.10862884e+00],
       [  5.31482696e+01,   5.75735117e+00],
       [  2.35648095e+01,   6.86763215e+00],
       [  1.03896743e+01,   5.56094447e+00],
       [  5.38022271e+00,   3.77897129e+00],
       [  4.04640994e+01,   7.73410229e+00],
       [  4.26401023e+01,   9.65698830e+00],
       [  2.70889185e+01,   6.99011525e+00],
       [  1.25831312e+01,   9.02325962e+00],
       [  4.37050041e+00,   2.95336488e+00],
       [  1.56264125e+00,   3.49663415e-01],
       [  8.76635563e+00,   6.63952940e+00],
       [  1.05402346e+01,   4.68811560e+00],
       [  1.54748168e+01,   5.87249105e+00],
       [  2.19255965e+00,   1.27491429e+00],
       [  1.02380914e+01,   8.03998335e+00],
       [  4.44749107e+00,   2.62899042e+00],
       [  1.23797880e+01,   6.63638174e+00],
       [  2.96741702e+00,   1.74816533e+00],
       [  1.47014055e+00,   8.07991990e-01],
       [  5.79487400e+01,   8.64286929e+00],
       [  3.34349736e+01,   6.62734211e+00],
       [  6.39160767e+00,   3.13058945e+00],
       [  2.79046563e+00,   1.99665446e+00],
       [  6.63060081e+00,   5.16057748e+00],
       [  4.05177028e+00,   2.71234320e+00],
       [  5.09652749e+00,   3.18587442e+00],
       [  2.31803181e+01,   7.44934923e+00],
       [  5.09302394e+00,   3.37589582e+00],
       [  1.48648709e+00,   8.45111304e-01],
       [  2.48102964e+00,   6.73050927e-01],
       [  1.89268964e+01,   9.08894585e+00],
       [  2.87131580e+01,   7.29970762e+00],
       [  1.43533163e+00,   2.80430862e-01],
       [  6.68382679e+01,   1.01365343e+01],
       [  3.65972199e+00,   2.83562760e+00],
       [  1.21058095e+00,  -3.81690428e-01],
       [  1.38794916e+01,   6.46095517e+00],
       [  1.26117353e+01,   9.90945218e+00],
       [  5.60249987e+01,   7.94263997e+00],
       [  7.42596865e+01,   1.35468156e+01],
       [  3.25174614e+01,   7.42884639e+00],
       [  1.09474307e+00,   4.49486636e-01],
       [  6.99788047e+00,   4.28223639e+00],
       [  7.13857067e+01,   8.58348549e+00],
       [  7.11313336e+00,   4.68742843e+00],
       [  8.33993139e+00,   5.17608057e+00],
       [  1.15516549e+01,   7.14276310e+00],
       [  3.26072579e+01,   5.67921625e+00],
       [  4.59558263e+01,   7.78128284e+00],
       [  6.75333331e+01,   9.79720880e+00],
       [  5.49408710e+00,   3.18570882e+00],
       [  8.29060376e+00,   4.67626139e+00]])

Analyze SNR results from full data


In [21]:
SNR = np.load('gainExp.npy')

In [22]:
plt.hist(SNR[:,1])


Out[22]:
(array([ 96.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   1.]),
 array([  -1.73981889,   27.4225984 ,   56.5850157 ,   85.747433  ,
         114.90985029,  144.07226759,  173.23468488,  202.39710218,
         231.55951948,  260.72193677,  289.88435407]),
 <a list of 10 Patch objects>)

In [23]:
plt.hist(SNR[:,0])


Out[23]:
(array([ 69.,  17.,  10.,   0.,   0.,   0.,   0.,   0.,   0.,   1.]),
 array([  1.62656793e-01,   2.91348265e+01,   5.81069962e+01,
          8.70791660e+01,   1.16051336e+02,   1.45023505e+02,
          1.73995675e+02,   2.02967845e+02,   2.31940015e+02,
          2.60912184e+02,   2.89884354e+02]),
 <a list of 10 Patch objects>)